In this note, we study how to construct, or recreate charts presented in World Inequality Report. We focus on the Executive Summary, however, we hope that you can study the report in more detail using the original data provided in the site.
There is an R package to make it easier to download the data.
library(tidyverse)
── Attaching packages ───────────────────────────────────────────────────────── tidyverse 1.3.2 ──✔ ggplot2 3.4.0 ✔ purrr 1.0.0
✔ tibble 3.1.8 ✔ dplyr 1.0.10
✔ tidyr 1.2.1 ✔ stringr 1.5.0
✔ readr 2.1.3 ✔ forcats 0.5.2 ── Conflicts ──────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
library(readxl)
library(DT)
url_summary <- "https://wir2022.wid.world/www-site/uploads/2022/03/WIR2022TablesFigures-Summary.xlsx"
download.file(url = url_summary, destfile = "data/WIR2022TablesFigures-Summary.xlsx")
summary_sheets <- excel_sheets("data/WIR2022TablesFigures-Summary.xlsx")
summary_sheets
[1] "Index" "F1" "F2" "F3" "F4" "F5." "F6"
[8] "F7" "F8" "F9" "F10" "F11" "F12" "F13"
[15] "F14" "F15" "T1" "data-F1" "data-F2" "data-F3" "data-F4"
[22] "data-F5" "data-F6" "data-F7" "data-F8" "data-F9" "data-F10" "data-F11"
[29] "data-F12" "data-F13." "data-F14." "data-F15"
df_index <- read_excel("data/WIR2022TablesFigures-Summary.xlsx", sheet = "Index")
df_index
df_f1 <- read_excel("data/WIR2022TablesFigures-Summary.xlsx", sheet = "data-F1")
New names:
df_f1
df_f1 %>% select(cat = ...1, 2:4) %>%
pivot_longer(2:4, names_to = "group", values_to = "value") %>%
ggplot(aes(x = cat, y = value, fill = group)) +
geom_col(position = "dodge") +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
geom_text(aes(x = cat, y = value, group = group, label = scales::label_percent(accuracy=1)(value)),
position = position_dodge(width = 0.9)) +
labs(title = "Figure 1. Global income and wealth inequality, 2021",
x = "", y = "Share of total income or wealth", fill = "")
Interpretation: The global bottom 50% captures 8.5%
of total income measured at Purchasing Power Parity (PPP). The global
bottom 50% owns 2% of wealth (at Purchasing Power Parity). The global
top 10% owns 76% of total Household wealth and captures 52% of total
income in 2021. Note that top wealth holders are not necessarily top
income holders. Incomes are measured after the operation of pension and
unemployment systems and before taxes and transfers.
Sources and series: wir2022.wid.world/methodology.
pivot_longer.df_f1 %>% select(cat = ...1, 2:4) %>%
pivot_longer(2:4, names_to = "group", values_to = "value")
Now we use ggplot2 to draw a chart.
df_f1 %>% select(cat = ...1, 2:4) %>%
pivot_longer(2:4, names_to = "level", values_to = "value") %>%
ggplot(aes(x = cat, y = value, fill = level)) +
geom_col()
position = dodge.df_f1 %>% select(cat = ...1, 2:4) %>%
pivot_longer(2:4, names_to = "group", values_to = "value") %>%
ggplot(aes(x = cat, y = value, fill = group)) +
geom_col(position = "dodge")
df_f1 %>% select(cat = ...1, 2:4) %>%
pivot_longer(2:4, names_to = "group", values_to = "value") %>%
ggplot(aes(x = cat, y = value, fill = group)) +
geom_col(position = "dodge") +
scale_y_continuous(labels = scales::percent_format(accuracy = 1))
df_f1 %>% select(cat = ...1, 2:4) %>%
pivot_longer(2:4, names_to = "group", values_to = "value") %>%
ggplot(aes(x = cat, y = value, fill = group)) +
geom_col(position = "dodge") +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
labs(title = "Figure 1. Global income and wealth inequality, 2021",
x = "", y = "Share of total income or wealth", fill = "")
df_f1 %>% select(cat = ...1, 2:4) %>%
pivot_longer(2:4, names_to = "group", values_to = "value") %>%
ggplot(aes(x = cat, y = value, fill = group)) +
geom_col(position = "dodge") +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
geom_text(aes(x = cat, y = value, group = group, label = scales::label_percent(accuracy=1)(value)),
position = position_dodge(width = 0.9)) +
labs(title = "Figure 1. Global income and wealth inequality, 2021",
x = "", y = "Share of total income or wealth", fill = "")
df_f3 <- read_excel("data/WIR2022TablesFigures-Summary.xlsx", sheet = "data-F3")
df_f3
map0<-map_data("world")
map0$region[map0$region=="Democratic Republic of the Congo"]<-"DR Congo"
map0$region[map0$region=="Republic of Congo"]<-"Congo"
map0$region[map0$region=="Ivory Coast"]<-"Cote dIvoire"
map0$region[map0$region=="Vietnam"]<-"Viet Nam"
map0$region[map0$region=="Russia"]<-"Russian Federation"
map0$region[map0$region=="South Korea"]<-"Korea"
map0$region[map0$region=="UK"]<-"United Kingdom"
map0$region[map0$region=="Brunei"]<-"Brunei Darussalam"
map0$region[map0$region=="Laos"]<-"Lao PDR"
map0$region[map0$region=="Cote dIvoire"]<-"Cote d'Ivoire"
map0$region[map0$region=="Cape Verde"]<- "Cabo Verde"
map0$region[map0$region=="Syria"]<- "Syrian Arab Republic"
map0$region[map0$region=="Trinidad"]<- "Trinidad and Tobago"
map0$region[map0$region=="Tobago"]<- "Trinidad and Tobago"
df_f3 %>%
mutate(`Top 10 Bottom 50 Ratio` = cut(T10B50,breaks = c(5, 12, 13, 16, 19,140), include.lowest = FALSE)) %>%
ggplot(aes(map_id = Country)) + geom_map(aes(fill = `Top 10 Bottom 50 Ratio`), map = map0) + expand_limits(x = world_map$long, y = world_map$lat) +
labs(title = "Figure 3. Top 10/Bottom 50 income gaps across the world, 2021",
x = "", y = "", fill = "Top 10/Bottom 50 ratio") +
theme(legend.position="bottom",
axis.text.x=element_blank(), axis.ticks.x=element_blank(),
axis.text.y=element_blank(), axis.ticks.y=element_blank()) +
scale_fill_brewer(palette='YlOrRd')
Interpretation: In Brazil, the bottom 50% earns 29 times less than the top 10%. The value is 7 in France. Income is measured after pension and unemployment payments and benefits received by individuals but before other taxes they pay and transfers they receive. Source and series: wir2022.wid.world/methodology.
df_f3 <- read_excel("data/WIR2022TablesFigures-Summary.xlsx", sheet = "data-F3")
df_f3
We use world_map data. Let us look at the data
first.
datatable(world_map)
Warning: It seems your data is too big for client-side DataTables. You may consider server-side processing: https://rstudio.github.io/DT/server.htmlWarning: It seems your data is too big for client-side DataTables. You may consider server-side processing: https://rstudio.github.io/DT/server.html
Let us try using the standard template of ggplot. We use
Country for map_id and T10B50 for
numerical data. Since world_map data contains
long and lat of each region, we assign them as
expand_limit.
df_f3 %>%
ggplot(aes(map_id = Country)) +
geom_map(aes(fill = `T10B50`), map = world_map) +
expand_limits(x = world_map$long, y = world_map$lat)
We observe a couple of issues by comparing with the image of the original.
F3_world_map
The region name and the country name may be different in two datasets
world_map and df_f3. Let us take care of the
first issue.
In order to search names, we use DT::datatable, i.e.,
datatable in DT package.
datatable(df_f3)
Search for ‘russia’ and ‘congo’, we find that
world_mapworld_mapworld_mapdf_f3_rev <- df_f3
df_f3_rev$Country[df_f3_rev$Country == "Russian Federation"] <- "Russia"
df_f3_rev$Country[df_f3_rev$Country == "DR Congo"] <- "Democratic Republic of the Congo"
df_f3_rev$Country[df_f3_rev$Country == "Congo"] <- "Republic of Congo"
Now check the country names again using anti_join. By
the code below, we can create a new table such that there is no region
in world_map corresponding to Country in
df_f3_rev.
df_f3_rev %>% anti_join(world_map, by = c("Country" = "region"))
We can proceed one by one. However, WIR provides the code of this
part in R. So let us use it. It is in Computer Codes at the Methodology
site. Download
‘Full Datasets’ and ‘Computer Codes’. Then in WIR2022 - Computer codes,
find Chapter1_Maps.R.
map<-map_data("world")
map$region[map$region=="Democratic Republic of the Congo"]<-"DR Congo"
map$region[map$region=="Republic of Congo"]<-"Congo"
map$region[map$region=="Ivory Coast"]<-"Cote dIvoire"
map$region[map$region=="Vietnam"]<-"Viet Nam"
# map$region[map$region=="United Arab Emirates"]<-"UAE"
The last one for UAE seems to be wrong, so deleted.
Since the data used in the next line was not find, let me use
map now.
index_region2<-read_dta("index_region.dta")
map<-left_join(map,index_region2,by=c("region"="name_region"))
map$ISO[map$region=="Greenland"]<-"GL"
map$ISO[map$region=="UAE"]<-"AE"
map$ISO[map$region=="Brunei"]<-"BR" # done
map$ISO[map$region=="Antigua"]<-"AG"
map$ISO[map$region=="Cape Verde"]<-"CV"
map$ISO[map$region=="Cote dIvoire"]<-"CI"
map$ISO[map$region=="UK"]<-"GB" # done
map$ISO[map$region=="Canary Islands"]<-"ES"
map$ISO[map$region=="French Guiana"]<-"FR"
map$ISO[map$region=="Saint Kitts"]<-"KN"
map$ISO[map$region=="South Korea"]<-"KR"
map$ISO[map$region=="Saint Martin"]<-"MF"
map$ISO[map$region=="Macedonia"]<-"MK"
map$ISO[map$region=="Russia"]<-"RU" # done
map$ISO[map$region=="Bonaire"]<-"BQ"
map$ISO[map$region=="Sint Eustatius"]<-"BQ"
map$ISO[map$region=="Saba"]<-"BQ"
map$ISO[map$region=="Laos"]<-"LA"
map$ISO[map$region=="Sint Maarten"]<-"SX"
map$ISO[map$region=="Syria"]<-"SY"
map$ISO[map$region=="Trinidad"]<-"TT"
map$ISO[map$region=="Tobago"]<-"TT"
map$ISO[map$region=="Virgin Islands"]<-"VI"
map$ISO[map$region=="Saint Vincent"]<-"VC"
map$ISO[map$region=="Grenadines"]<-"VC"
map$ISO[map$region=="French Southern and Antarctic Lands"]<-"FR"
map$ISO[map$region=="Western Sahara"]<-"WS"
map$region[map$region=="Russia"]<-"Russian Federation"
map$region[map$region=="South Korea"]<-"Korea"
map$region[map$region=="UK"]<-"United Kingdom"
map$region[map$region=="Brunei"]<-"Brunei Darussalam"
map$region[map$region=="Laos"]<-"Lao PDR"
map$region[map$region=="Cote dIvoire"]<-"Cote d'Ivoire"
map$region[map$region=="Cape Verde"]<- "Cabo Verde"
map$region[map$region=="Syria"]<- "Syrian Arab Republic"
map$region[map$region=="Trinidad"]<- "Trinidad and Tobago"
map$region[map$region=="Tobago"]<- "Trinidad and Tobago"
df_f3 %>% anti_join(map, by = c("Country" = "region"))
Is Zanzibar a part of Tanzania?
df_f3 %>%
ggplot(aes(map_id = Country)) +
geom_map(aes(fill = `T10B50`), map = map) +
expand_limits(x = map$long, y = map$lat)
Next we add a new column Top 10 Bottom 50 Ratio by
setting new breaks of T10B50. Place the legend at the
bottom using theme(legend.position="bottom").
df_f3_rev %>%
mutate(`Top 10 Bottom 50 Ratio` = cut(T10B50, breaks = c(5, 12, 13, 16, 19, 140), include.lowest = FALSE)) %>%
ggplot(aes(map_id = Country)) + geom_map(aes(fill = `Top 10 Bottom 50 Ratio`), map = world_map) + expand_limits(x = world_map$long, y = world_map$lat) +
theme(legend.position="bottom")
Finally add the title, remove x and y labels, and change the legend name.
df_f3_rev %>%
mutate(`Top 10 Bottom 50 Ratio` = cut(T10B50, breaks = c(5, 12, 13, 16, 19, 140), include.lowest = FALSE)) %>%
ggplot(aes(map_id = Country)) + geom_map(aes(fill = `Top 10 Bottom 50 Ratio`), map = world_map) + expand_limits(x = world_map$long, y = world_map$lat) +
labs(title = "Figure 3. Top 10/Bottom 50 income gaps across the world, 2021",
x = "", y = "", fill = "Top 10/Bottom 50 ratio") +
theme(legend.position="bottom")
Remove x-axis, y-axis and ticks. If you want to change color palette, see:
df_f3_rev %>%
mutate(`Top 10 Bottom 50 Ratio` = cut(T10B50, breaks = c(5, 12, 13, 16, 19, 140), include.lowest = FALSE)) %>%
ggplot(aes(map_id = Country)) + geom_map(aes(fill = `Top 10 Bottom 50 Ratio`), map = world_map) + expand_limits(x = world_map$long, y = world_map$lat) +
labs(title = "Figure 3. Top 10/Bottom 50 income gaps across the world, 2021",
x = "", y = "", fill = "Top 10/Bottom 50 ratio") +
theme(legend.position="bottom",
axis.text.x=element_blank(), axis.ticks.x=element_blank(),
axis.text.y=element_blank(), axis.ticks.y=element_blank()) +
scale_fill_brewer(palette='YlOrRd')
df_f4 <- read_excel("data/WIR2022TablesFigures-Summary.xlsx", sheet = "data-F4")
df_f4
df_f4 %>% pivot_longer(3:5, names_to = "level", values_to = "value") %>%
ggplot(aes(x = iso, y = value, fill = level)) +
geom_col(position = "dodge") +
scale_x_discrete(labels = function(x) stringr::str_wrap(x, width = 10)) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
labs(title = "Figure 4. The extreme concentration of capital: \nwealth inequality across the world, 2021",
x = "", y = "Share of national wealth (%)", fill = "")
Interpretation: The Top 10% in Latin America
captures 77% of total household wealth, versus 22% for the Middle 40%
and 1% for the Bottom 50%. In Europe, the Top 10% owns 58% of total
wealth, versus 38% for the Middle 40% and 4% for the Bottom 50%.
Sources and series: wir2022.wid.world/methodology.
Almost the same as F1 and F2.
scale_x_discrete(labels = function(x) stringr::str_wrap(x, width = 10))scale_y_continuous(labels = scales::percent_format(accuracy = 1))df_f5 <- read_excel("data/WIR2022TablesFigures-Summary.xlsx", sheet = "data-F5")
df_f5
df_f5 %>% select(year = y, ratio = t10b50) %>%
ggplot(aes(x = year, y = ratio)) +
lims(y = c(10,70)) +
stat_smooth(span = 0.25, se = FALSE) +
scale_x_continuous(breaks = round(seq(1820, 2020, by = 20),1)) +
labs(title = "Figure 5. Global income inequality:T10/B50 ratio, 1820-2020",
x = "", y = stringr::str_wrap("Ratio of top 10% average income to bottom 50% average income", width = 35)) +
annotate("text", x = 1840, y = 32, label = stringr::str_wrap("1820: average income of the global top 10% is 18x higher than average income of the bottom 50%", width = 20), size = 3) +
annotate("text", x = 1910, y = 49, label = stringr::str_wrap("1910: average income of the global top 10% is 41x higher than average income of the bottom 50%", width = 20), size = 3) +
annotate("text", x = 1980, y = 60, label = stringr::str_wrap("1980: average income of the global top 10% is 53x higher than average income of the bottom 50%", width = 20), size = 3) +
annotate("text", x = 2010, y = 32, label = stringr::str_wrap("2020: average income of the global top 10% is 38x higher than average income of the bottom 50%", width = 20), size = 3)
Interpretation. Global inequality, as measured by
the ratio T10/B50 between the average income of the top 10% and the
average income of the bottom 50%, more than doubled between between 1820
and 1910, from less than 20 to about 40, and stabilized around 40
between 1910 and 2020. It is too early to say whether the decline in
global inequality observed since 2008 will continue. Income is measured
per capita after pension and unemployement insurance transfers and
before income and wealth taxes.
Sources and series: wir2022.wid.world/lmethodology and
Chancel and Piketty (2021)..
The following is enough. Here we applied str_wrap to the
label of y-axis as it is very long.
df_f5 %>% select(year = y, ratio = t10b50) %>%
ggplot(aes(x = year, y = ratio)) +
geom_line() +
labs(title = "Figure 5. Global income inequality:T10/B50 ratio, 1820-2020",
x = "", y = stringr::str_wrap("Ratio of top 10% average income to bottom 50% average income", width = 35))
There are many way of smoothing.
Line Plot and LOESS
df_f5 %>% select(year = y, ratio = t10b50) %>%
ggplot(aes(x = year, y = ratio)) +
geom_line() +
geom_smooth(method = "loess", se = FALSE) +
labs(title = "Figure 5. Global income inequality:",
subtitle = "T10/B50 ratio, 1820-2020",
x = "", y = "Ratio of top 10% average income to bottom 50% average income")
GAM Smoothing with 24 Points
df_f5 %>% select(year = y, ratio = t10b50) %>%
ggplot(aes(x = year, y = ratio)) +
stat_smooth(method = "gam", formula = y ~ s(x, k = 24), se = FALSE) +
scale_x_continuous(breaks = round(seq(min(df_f5$y), max(df_f5$y), by = 20),1)) +
labs(title = "Figure 5. Global income inequality:T10/B50 ratio, 1820-2020",
x = "", y = stringr::str_wrap("Ratio of top 10% average income to bottom 50% average income", width = 35))
Polynomial Approximation of Degree 6
df_f5 %>% select(year = y, ratio = t10b50) %>%
ggplot(aes(x = year, y = ratio)) +
geom_point() +
geom_smooth(method = "lm", formula = y ~ poly(x, 6), se = FALSE) +
labs(title = "Figure 5. Global income inequality:",
subtitle = "T10/B50 ratio, 1820-2020",
x = "", y = stringr::str_wrap("Ratio of top 10% average income to bottom 50% average income", width = 35))
In the main chart for F5, we applied
stat_smooth(span = 0.25, se = FALSE) as it is easy. You can
adjust smoothness by changing the value for span.
For y-axis, following the output provided,
lims(y = c(10,70)) is added and annotation.
For a long text use:
stringr::str_wrap("long text", width = size) and
annotate with size = fontsize.
ggforce::geom_mark_rect will add annotation in a
box.
df_f6 <- read_excel("data/WIR2022TablesFigures-Summary.xlsx", sheet = "data-F6")
New names:
df_f6
df_f6 %>% select(year = "...1", 2:3) %>%
pivot_longer(cols = 2:3, names_to = "type", values_to = "value") %>%
mutate(types = factor(type, levels = c("Within-country inequality", "Between-country inequality"))) %>%
ggplot(aes(x = year, y = value, fill = types)) +
geom_area() +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
scale_x_continuous(breaks = round(seq(1820, 2020, by = 20),1)) +
scale_fill_manual(values=rev(scales::hue_pal()(2)), labels = function(x) str_wrap(x, width = 15)) +
labs(title = "Figure 6. Global income inequality: \nBetween vs. within country inequality (Theil index), 1820-2020",
x = "", y = "Share of global inequality (% of total Theil index)", fill = "") +
annotate("text", x = 1850, y = 0.28, label = stringr::str_wrap("1820: Between country inequality represents 11% of global inequality", width = 20), size = 3) +
annotate("text", x = 1980, y = 0.70, label = stringr::str_wrap("1980: Between country inequality represents 57% of global inequality", width = 20), size = 3) +
annotate("text", x = 1990, y = 0.30, label = stringr::str_wrap("2020: Between country inequality represents 32% of global inequality", width = 20), size = 3)
Legend can be placed at the bottom:
theme(legend.position="bottom")
df_f6 %>% select(year = "...1", 2:3) %>%
pivot_longer(cols = 2:3, names_to = "type", values_to = "value") %>%
mutate(types = factor(type, levels = c("Within-country inequality", "Between-country inequality"))) %>%
ggplot(aes(x = year, y = value, fill = types)) +
geom_area() +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
scale_x_continuous(breaks = round(seq(1820, 2020, by = 20),1)) +
scale_fill_manual(values=rev(scales::hue_pal()(2))) +
labs(title = "Figure 6. Global income inequality: \nBetween vs. within country inequality (Theil index), 1820-2020",
x = "", y = "Share of global inequality (% of total Theil index)", fill = "") +
annotate("text", x = 1850, y = 0.28, label = stringr::str_wrap("1820: Between country inequality represents 11% of global inequality", width = 20), size = 3) +
annotate("text", x = 1980, y = 0.70, label = stringr::str_wrap("1980: Between country inequality represents 57% of global inequality", width = 20), size = 3) +
annotate("text", x = 1990, y = 0.30, label = stringr::str_wrap("2020: Between country inequality represents 32% of global inequality", width = 20), size = 3) +
theme(legend.position="bottom")
Interpretation. The importance of between-country
inequality in overall global inequality, as measured by the Theil index,
rose between 1820 and 1980 and strongly declined since then. In 2020,
between-country inequality makes-up about a third of global inequality
between individuals. The rest is due to inequality within countries.
Income is measured per capita after pension and unemployement insurance
transfers and before income and wealth taxes.
Sources and series: wir2022.wid.world/methodology and
Chancel and Piketty (2021).
We use geom_area.
df_f6 %>% select(year = "...1", 2:3) %>%
pivot_longer(cols = 2:3, names_to = "type", values_to = "value") %>%
ggplot(aes(x = year, y = value, fill = type)) +
geom_area() +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
labs(title = "Figure 6. Global income inequality: \nBetween vs. within country inequality (Theil index), 1820-2020",
x = "", y = "Share of global inequality (% of total Theil index)")
Since the order of the two groups are different, the following linw is added.
scale_fill_manual(values=rev(scales::hue_pal()(2)), labels = function(x) str_wrap(x, width = 15))
The second option is to control the legend to wrap.
Annotation can be omitted if we use RMarkdown to explain the charts clearly.
df_f6 %>% select(year = "...1", 2:3) %>%
pivot_longer(cols = 2:3, names_to = "type", values_to = "value") %>%
mutate(types = factor(type, levels = c("Within-country inequality", "Between-country inequality"))) %>%
ggplot(aes(x = year, y = value, fill = types)) +
geom_area() +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
scale_x_continuous(breaks = round(seq(1820, 2020, by = 20),1)) +
scale_fill_manual(values=rev(scales::hue_pal()(2)), labels = function(x) str_wrap(x, width = 15)) +
labs(title = "Figure 6. Global income inequality: \nBetween vs. within country inequality (Theil index), 1820-2020",
x = "", y = "Share of global inequality (% of total Theil index)", fill = "")
df_f7 <- read_excel("data/WIR2022TablesFigures-Summary.xlsx", sheet = "data-F7")
df_f7
df_f7 %>% select(year = y, 2:4) %>%
pivot_longer(cols = 2:4, names_to = "type", values_to = "value") %>%
ggplot(aes(x = year, y = value, color = type)) +
stat_smooth(span = 0.25, se = FALSE) +
scale_x_continuous(breaks = round(seq(1820, 2020, by = 20),1)) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
labs(title = "Figure 7. Global income inequality, 1820-2020",
x = "", y = " Share of total world income (%)", color = "") +
annotate("text", x = 1980, y = 0.20, label = stringr::str_wrap("The global bottom 50% income share remains historically low despite growth in the emerging world in the past decades.", width = 30), size = 3)
Interpretation. The share of global income going to top 10% highest incomes at the world level has fluctuated around 50-60% between 1820 and 2020 (50% in 1820, 60% in 1910, 56% in 1980, 61% in 2000, 55% in 2020), while the share going to the bottom 50% lowest incomes has generally been around or below 10% (14% in 1820, 7% in 1910, 5% in 1980, 6% in 2000, 7% in 2020). Global inequality has always been very large. It rose between 1820 and 1910 and shows little long-run trend between 1910 and 2020. Distribution of per capita incomes. Sources and series: see wir2022.wid.world/methodology and Chancel and Piketty (2021).
Use pivot_longer to tidy the data.
df_f7 %>% select(year = y, 2:4) %>%
pivot_longer(cols = 2:4, names_to = "type", values_to = "value")
Use stat_smooth with span, and change the
scale of x-axis and y-axis.
df_f7 %>% select(year = y, 2:4) %>%
pivot_longer(cols = 2:4, names_to = "type", values_to = "value") %>%
ggplot(aes(x = year, y = value, color = type)) +
stat_smooth(span = 0.25, se = FALSE) +
scale_x_continuous(breaks = round(seq(1820, 2020, by = 20),1)) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
labs(title = "Figure 7. Global income inequality, 1820-2020",
x = "", y = " Share of total world income (%)", color = "")
Finally, add the annotation to obtain the figure above.
df_f8 <- read_excel("data/WIR2022TablesFigures-Summary.xlsx", sheet = "data-F8")
df_f8
df_f8 %>%
select(year, Germany_public = Germany, Germany_private = 'Germany (private)',
Spain_public = Spain, Spain_private = 'Spain (private)',
France_public = France, France_private = 'France (private)',
UK_public = UK, UK_private = 'UK (private)',
Japan_public = Japan, Japan_private = 'Japan (private)',
Norway_public = Norway, Norway_private = 'Norway (private)',
USA_public = USA, USA_private = 'USA (private)') %>%
pivot_longer(!year, names_to = c("country",".value"), names_sep = "_") %>%
pivot_longer(3:4, names_to = "type", values_to = "value") %>%
ggplot() +
stat_smooth(aes(x = year, y = value, color = country, linetype = type), span = 0.25, se = FALSE, size=0.75) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
labs(title = "Figure 8. The rise of private versus the decline of public wealth in rich countries, 1970-2020",
x = "", y = "wealth as as % of national income", color = "", type = "")
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
Please use `linewidth` instead.
Interpretation: Public wealth is the sum of all
financial and non-financial assets, net of debts, held by governments.
Public wealth dropped from 60% of national income in 1970 to -106% in
2020 in the UK.
Sources and series: wir2022.wid.world/methodology,
Bauluz et al. (2021) and updates.
There are two types of groups, countries and public vs private. So we change the column names first to identify that classification easily. We delete the last two columns “gwealAVGRICH” and “pwealAVGRICH” as they will not appear in the chart.
df_f8 %>%
select(year, Germany_public = Germany, Germany_private = 'Germany (private)',
Spain_public = Spain, Spain_private = 'Spain (private)',
France_public = France, France_private = 'France (private)',
UK_public = UK, UK_private = 'UK (private)',
Japan_public = Japan, Japan_private = 'Japan (private)',
Norway_public = Norway, Norway_private = 'Norway (private)',
USA_public = USA, USA_private = 'USA (private)')
First separate the countries into a new column.
df_f8 %>%
select(year, Germany_public = Germany, Germany_private = 'Germany (private)',
Spain_public = Spain, Spain_private = 'Spain (private)',
France_public = France, France_private = 'France (private)',
UK_public = UK, UK_private = 'UK (private)',
Japan_public = Japan, Japan_private = 'Japan (private)',
Norway_public = Norway, Norway_private = 'Norway (private)',
USA_public = USA, USA_private = 'USA (private)') %>%
pivot_longer(!year, names_to = c("country",".value"), names_sep = "_")
Next apply pivot_longer again to form the second
group.
df_f8 %>%
select(year, Germany_public = Germany, Germany_private = 'Germany (private)',
Spain_public = Spain, Spain_private = 'Spain (private)',
France_public = France, France_private = 'France (private)',
UK_public = UK, UK_private = 'UK (private)',
Japan_public = Japan, Japan_private = 'Japan (private)',
Norway_public = Norway, Norway_private = 'Norway (private)',
USA_public = USA, USA_private = 'USA (private)') %>%
pivot_longer(!year, names_to = c("country",".value"), names_sep = "_") %>%
pivot_longer(3:4, names_to = "type", values_to = "value")
Assign colors to countries and distinguish types by
linetype.
df_f8 %>%
select(year, Germany_public = Germany, Germany_private = 'Germany (private)',
Spain_public = Spain, Spain_private = 'Spain (private)',
France_public = France, France_private = 'France (private)',
UK_public = UK, UK_private = 'UK (private)',
Japan_public = Japan, Japan_private = 'Japan (private)',
Norway_public = Norway, Norway_private = 'Norway (private)',
USA_public = USA, USA_private = 'USA (private)') %>%
pivot_longer(!year, names_to = c("country",".value"), names_sep = "_") %>%
pivot_longer(3:4, names_to = "type", values_to = "value") %>%
ggplot() +
stat_smooth(aes(x = year, y = value, color = country, linetype = type), span = 0.25, se = FALSE)
Change the line width by size, change the y-axis to
percents and add the title.
df_f8 %>%
select(year, Germany_public = Germany, Germany_private = 'Germany (private)',
Spain_public = Spain, Spain_private = 'Spain (private)',
France_public = France, France_private = 'France (private)',
UK_public = UK, UK_private = 'UK (private)',
Japan_public = Japan, Japan_private = 'Japan (private)',
Norway_public = Norway, Norway_private = 'Norway (private)',
USA_public = USA, USA_private = 'USA (private)') %>%
pivot_longer(!year, names_to = c("country",".value"), names_sep = "_") %>%
pivot_longer(3:4, names_to = "type", values_to = "value") %>%
ggplot() +
stat_smooth(aes(x = year, y = value, color = country, linetype = type), span = 0.25, se = FALSE, size=0.75) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
labs(title = "Figure 8. The rise of private versus the decline of public wealth in rich countries, 1970-2020",
x = "", y = "wealth as as % of national income", color = "", type = "")
Let us check if we use only color for each group representing columns. This is not the chart we wanted to obtain.
df_f8 %>%
pivot_longer(!year, names_to = "group", values_to = "value") %>%
ggplot() +
stat_smooth(aes(x = year, y = value, color = group), span = 0.25, se = FALSE, size=0.75) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
labs(title = "Figure 8. The rise of private versus the decline of public wealth in rich countries, 1970-2020",
x = "", y = "wealth as as % of national income", color = "")
Original
F9_world_map
df_f9 <- read_excel("data/WIR2022TablesFigures-Summary.xlsx", sheet = "data-F9")
df_f9
df_f9 %>% select(p, wealth = 'Wealth growth 1995-2021') %>%
ggplot() +
stat_smooth(aes(x = p, y = wealth), span = 0.25, se = FALSE) +
# scale_x_continuous(breaks = round(c(seq(0, 90, by = 10), 99.9, 99.99, 99.999,3))) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
labs(title = "Figure 9. Average annual wealth growth rate, 1995-2021",
x = "←1% poorest Global wealth group 0.001% richest→", y = "Per adult annual growth rate in wealth, net of inflation (%)", color = "")
Interpretation: Growth rates among the poorest half
of the population were between 3% and 4% per year, between 1995 and
2021. Since this group started from very low wealth levels, its absolute
levels of growth remained very low. The poorest half of the world
population only captured 2.3% of overall wealth growth since 1995. The
top 1% benefited from high growth rates (3% to 9% per year). This group
captured 38% of total wealth growth between 1995 and 2021. Net household
wealth is equal to the sum of financial assets (e.g. equity or bonds)
and non-financial assets (e.g. housing or land) owned by individuals,
net of their debts.
Sources and series: wir2022.wid.world/methodology.
df_f9 <- read_excel("data/WIR2022TablesFigures-Summary.xlsx", sheet = "data-F9")
df_f9
df_f9_rev <- df_f9 %>% mutate(pp = 1/(100-p))
df_f9_rev
df_f9_rev %>% select(pp, wealth = 'Wealth growth 1995-2021') %>%
ggplot() +
stat_smooth(aes(x = pp, y = wealth), span = 0.25, se = FALSE) +
# scale_x_continuous(breaks = round(c(seq(0, 90, by = 10), 99.9, 99.99, 99.999,3))) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
labs(title = "Figure 9. Average annual wealth growth rate, 1995-2021",
x = "←1% poorest Global wealth group 0.001% richest→", y = "Per adult annual growth rate in wealth, net of inflation (%)", color = "")
df_f9 %>% select(p, wealth = 'Wealth growth 1995-2021') %>%
ggplot() +
geom_point(aes(x = p, y = wealth)) +
# scale_x_continuous(breaks = round(c(seq(0, 90, by = 10), 99.9, 99.99, 99.999,3))) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
labs(title = "Figure 9. Average annual wealth growth rate, 1995-2021",
x = "←1% poorest Global wealth group 0.001% richest→", y = "Per adult annual growth rate in wealth, net of inflation (%)", color = "")
Not Yet
Note that the sheet name of F14 has period at the end. Note that
summary_sheets[31] =data-F14. with a period.
df_f14 <- read_excel("data/WIR2022TablesFigures-Summary.xlsx", sheet = "data-F14.")
df_f14
\n for line break in the title.df_f14 %>%
ggplot(aes(x = Group, y = Share)) +
geom_col(width = 0.5, fill = scales::hue_pal()(1)[1]) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
labs(title = "Figure 14. Global carbon inequality, \n2019 Group contribution to world emissions (%)",
x = "", y = "Share of world emissions (%)")
Interpretation: Personal carbon footprints include emissions from domestic consumption, public and private investments as well as imports and exports of carbon embedded in goods and services traded with the rest of the world. Modeled estimates based on the systematic combination of tax data, household surveys and input-output tables. Emissions split equally within households. Sources and series: wir2022.wid.world/methodology and Chancel (2021).
Not so difficult. You can assign color name. See http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/.
df_f15 <- read_excel("data/WIR2022TablesFigures-Summary.xlsx", sheet = "data-F15")
df_f15
df_f15 %>% mutate(region = rep(regionWID[!is.na(regionWID)], each = 3)) %>%
select(region, group, tcap) %>%
ggplot(aes(x = region, y = tcap, fill = group)) +
geom_col(position = "dodge") +
scale_x_discrete(labels = function(x) stringr::str_wrap(x, width = 10)) +
labs(title = "Figure 15 Per capita emissions across the world, 2019",
x = "", y = "tonnes of CO2e per person per year", fill = "")
Interpretation: Personal carbon footprints include emissions from domestic consumption, public and private investments as well as imports and exports of carbon embedded in goods and services traded with the rest of the world. Modeled estimates based on the systematic combination of tax data, household surveys and input-output tables. Emissions split equally within households. Sources and series: wir2022.wid.world/methodology and Chancel (2021).
It is in Excel stype and there are missing values in the first column. In order to add a new column, let us check the following.
region_test <- rep(df_f15$regionWID[!is.na(df_f15$regionWID)], each = 3)
region_test
[1] "East Asia" "East Asia" "East Asia"
[4] "Europe" "Europe" "Europe"
[7] "North America" "North America" "North America"
[10] "South & South-East Asia" "South & South-East Asia" "South & South-East Asia"
[13] "Russia & Central Asia" "Russia & Central Asia" "Russia & Central Asia"
[16] "MENA" "MENA" "MENA"
[19] "Latin America" "Latin America" "Latin America"
[22] "Sub-Saharan Africa" "Sub-Saharan Africa" "Sub-Saharan Africa"
Add the names of the region in the last column by mutate
and choose columns by select.
df_f15 %>% mutate(region = rep(regionWID[!is.na(regionWID)], each = 3))
df_f15 %>% mutate(region = rep(regionWID[!is.na(regionWID)], each = 3)) %>%
select(region, group, tcap)
Now it is not difficult to draw a chart.
df_f15 %>% mutate(region = rep(regionWID[!is.na(regionWID)], each = 3)) %>%
select(region, group, tcap) %>%
ggplot(aes(x = region, y = tcap, fill = group)) +
geom_col(position = "dodge") +
scale_x_discrete(labels = function(x) stringr::str_wrap(x, width = 10)) +
labs(title = "Figure 15 Per capita emissions across the world, 2019",
x = "", y = "tonnes of CO2e per person per year", fill = "")
Check the format of the sheet and add
skip=4, n_max=7.
df_t1 <- read_excel("data/WIR2022TablesFigures-Summary.xlsx",
sheet = "T1", skip = 4, n_max = 7)
df_t1
Interpretation: In 2021, there were 62.2 million people in the world owning more than $1 million (measured at Market Exchange Rates). Their average wealth was $ 2.8 million, representing a total of $174 trillion. In our Tax scenario 2, a global progressive wealth tax would yield 2.1% of global income, taking into account capital depreciation and evasion. Sources and series: wir2022.wid.world/methodology.
WIDWID is a package in development stage, and it is not
available at CRAN. So we need a spacial way to install the package.
To install, run the following code in Console. If you are recommended to update, select 1 by choosing ‘All’.
For references use ‘?download_wid’ or put ‘download_wid’ in the search box under Help.
It is similar to WDI. For more detail and examples, see
vignettes.
library("wid")
We now plot the evolution of average net national income per adult in Japan, France, Germany, the United Kingdom and the United States.
# Average national income data
data <- download_wid(
indicators = "anninc", # Average net national income
areas = c("JP", "FR", "US", "DE", "GB"),
ages = 992 # Adults
) %>% rename(value_lcu = value)
# Purchasing power parities with US dollar
ppp <- download_wid(
indicators = "xlcusp", # US PPP
areas = c("JP", "FR", "US", "DE", "GB"), # France, China and United States
year = 2016 # Reference year only
) %>% rename(ppp = value) %>% select(-year, -percentile)
# Convert from local currency to PPP US dollar
data <- merge(data, ppp, by = "country") %>%
mutate(value_ppp = value_lcu/ppp) %>%
filter(year %in% 1950:2021)
ggplot(data) +
geom_line(aes(x = year, y = value_ppp, color = country, linetype=country)) +
scale_y_log10(breaks = c(2e3, 5e3, 1e4, 2e4, 5e4)) +
ylab("2016 $ PPP") +
scale_color_discrete(
labels = c("JP" = "Japan", "US" = "USA", "FR" = "France", "DE" = "Germany", "GB" = "UK")
) +
scale_linetype_discrete(
labels = c("JP" = "Japan", "US" = "USA", "FR" = "France", "DE" = "Germany", "GB" = "UK")
) +
ggtitle("Average net national income per adult")